version 13.1
set more off
capture log close
log using attc_sampling.log, replace

************************************************************
* Kuehn/Rohlfing: "Are there really two cultures"
*
* Replication code for generating random order of articles 
************************************************************

clear
set obs 100
gen holder = .
label data "Kuehn/Rohlfing: ATTC population of journal articles"
save attc_pop, replace
clear
local journal = "ajps apsr cps ejpr io wp"
foreach x of local journal {
	forvalues n = 1/3 {
		insheet using `x'_`n'.txt, clear
		label data "Kuehn/Rohlfing: ATTC data, journal `x', period `n'"
		gen ident = "`x'_`n'"
		save `x'_`n', replace
		use attc_pop, clear
		append using `x'_`n', force
		save attc_pop, replace
	}
}

label var ident "Identifier"
label var paradigm "Method based on screening"
label var usable "Fit based on screening"

capture drop v12 v13 comments url paradigmhu randomid

drop if year == .
save attc_pop, replace

set seed 831
sample 1000, count by(ident paradigm) 
* Setting high sampling N reorders all articles randomly per journal-period

/* By accident, the imported file for ejpr_1 oridinally was identical with 
ejpr_2. When correcting for the error by importing the correct data for ejpr_1,
we get a different sample order from ejpr_1 onward. Since we started coding 
articles before we realized the mistake, we had to continue using the wrong 
import data. In order to derive a random ranking for the ejpr_1 articles, we 
decided to handle this file separately and replace the wrong data with the 
correct one in a second step to have an integrated file in the end. */

save attc_sample, replace

gen random_id = .
label var random_id "Sample ID per journal-period and paradigm"
gen download = . 
label var download "Empty variable for documenting article downloads"
bys ident paradigm: replace random_id = _n
label data "Kuehn/Rohlfing: ATTC random sample order"
save attc_sample, replace

* Individual handlung of ejpr_1 data (see above)
insheet using ejpr_1true.txt, clear
gen ident = "ejpr_1"
label var ident "Identifier"
label var paradigm "Method based on screening"
label var ident "Identifier"
label var usable "Fit based on screening"
capture drop url paradigmhu randomid
keep if paradigm < 2
save ejpr_1true, replace
local max = _N-1
display `max'
set seed 831
sample `max', count by(paradigm)
replace issue = "1.2" if issue == "1-2"
replace issue = "2.3" if issue == "2-3"
destring issue, replace
gen random_id = .
label var random_id "Sample ID per journal-period and paradigm"
gen download = . 
label var download "Empty variable for documenting article downloads"
bys paradigm: replace random_id = _n
save ejpr_1true, replace

use attc_sample, clear
keep if ident != "ejpr_1"
append using ejpr_1true
sort ident paradigm random_id
drop holder
save attc_sample, replace

outsheet using attc_sample.txt, replace

log close
